# Packages and dependence
packageCheckClassic <- function(x){
#
for( i in x ){
if( ! require( i , character.only = TRUE ) ){
install.packages( i , dependencies = TRUE )
require( i , character.only = TRUE )
}
}
}
packageCheckClassic(c('gridExtra','DESeq2','adegenet','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: gridExtra
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
##
## score
## The following object is masked from 'package:BiocGenerics':
##
## score
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.18'
## is available with R version '4.3'; see https://bioconductor.org/install
##
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
##
## install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
##
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
##
## rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: vegan
## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
##
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
##
## plotMA
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (69c36ce6) has not changed since last install.
## Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('pairwiseAdonis')
## Loading required package: cluster
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Functions
trimFunction <- function(resLFC,p_adj_cut,lfc_cut) {
resOrdered<-resLFC[order(resLFC$padj),]
y<-na.omit(resOrdered)
resTrim<-y[y$padj < p_adj_cut,]
resTrim <- resTrim[ which( resTrim$log2FoldChange > lfc_cut | resTrim$log2FoldChange < -lfc_cut), ]
resTrim <- as.data.frame(resTrim)
resTrim$genes <- rownames(resTrim)
return(resTrim)
}
applyAnnot <- function(inputDf,inputDfAnnot) {
gene_and_annot_col <- character(0)
inputDf_annot_genes <- inputDfAnnot$genes
inputDf_annot_pfam <- inputDfAnnot$pfam_annotation
for (i in 1:length(inputDf_annot_genes)) {
gene_and_annot_col <- c(gene_and_annot_col,
paste(inputDf_annot_genes[i], " - ", inputDf_annot_pfam[i]))
}
inputDf_genes <- inputDf$genes
new_gene_annot_col <- character(0)
for (i in inputDf_genes) {
gene <- i
for (j in gene_and_annot_col) {
j_split <- strsplit(j, " ", fixed = TRUE)[[1]]
if (i %in% j_split[1]) {
gene <- j
}
}
new_gene_annot_col <- c(new_gene_annot_col, gene)
}
inputDf$genes <- new_gene_annot_col
return(inputDf)
}
heatmapFunction <- function(newColNames,commonGenes,commonGenesAll,vsd,nameCode) {
vsdCommonGm <- vsd[commonGenes$genes,]
vsdCommonGm@colData@rownames = newColNames
annot_genes = commonGenesAll$genes
genes = names(vsdCommonGm)
new_names = list()
for (i in annot_genes) {
gene = i
for (j in genes) {
if (i == j) {
gene = j
}
}
new_names <- c(new_names,gene)
}
names(vsdCommonGm) <- new_names
pheatmap(assay(vsdCommonGm),main=nameCode,scale="row", cluster_rows=T, show_rownames=T,
cluster_cols=FALSE,cellwidth = 20,fontsize_row = 8)
return(vsdCommonGm)
}
similarityFunction <- function(dataset1,dataset2) {
c = 0
for (i in rownames(dataset1)) {
if (i %in% rownames(dataset2)) {
c = c + 1
}
}
sim = (c / as.integer(length(rownames(dataset2)))) * 100
return(sim)
}
#### May 2018 ####
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_may2018_inversed.txt',header=T)
samplesPV<-read.table('tximport_design_PV_may2018_inversed.txt',header=T)
samplesGM<-read.table('tximport_design_GM_may2018_inversed.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("may2018_sp_sp|may2018_gm_sp", colnames(raw_counts))]
raw_counts_pv <- raw_counts[,grep("may2018_pv_pv|may2018_gm_pv", colnames(raw_counts))]
raw_counts_gm <- raw_counts[,grep("may2018_gm_gm|may2018_sp_gm|may2018_pv_gm", colnames(raw_counts))]
#samples_order_sp = c(5,6,7,8,9,10,11,12,1,2,3,4)
#samples_order_pv = c(6,7,8,9,10,11,12,13,14,1,2,3,4,5)
#samples_order_gm = c(1,2,3,4,5,6,7,8,14,15,16,17,18,9,10,11,12,13)
#raw_counts_sp <- raw_counts_sp[,samples_order_sp]
#raw_counts_pv <- raw_counts_pv[,samples_order_pv]
#raw_counts_gm <- raw_counts_gm[,samples_order_gm]
#### SP ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_sp_trt"
## [3,] "originSite_finalSite_experiment_sp_sp_tro_vs_gm_sp_trt"
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_tro"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_tro_lfc2_intra,0.05,1)
sp_sp_bck_VS_gm_sp_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_sp_trt"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_gm_sp_trt_lfc2_intra<-trimFunction(sp_sp_bck_VS_gm_sp_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(sp_sp_bck_VS_gm_sp_trt_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.17677 - UPAR_LY6
## 2 STRG.2411 - p450
## 3 STRG.6873 - Ank
## 4 STRG.40812
## 5 STRG.30087 - DUF4795
## 6 STRG.40633 - L51_S25_CI-B8
## 7 STRG.24627 - Na_Ca_ex
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - SP") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 12627 0.17386 1.0522 0.297
## Residual 10 60003 0.82614
## Total 12 72631 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 sp_sp_bck vs sp_sp_tro 1 5061.903 0.9833413 0.1408124 0.592 1.000
## 2 sp_sp_bck vs gm_sp_trt 1 6885.942 1.0566985 0.1497440 0.283 0.849
## 3 sp_sp_tro vs gm_sp_trt 1 6857.458 1.0967089 0.1205611 0.196 0.588
## sig
## 1
## 2
## 3
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## sp_sp_bck-gm_sp_trt -17.440163 -42.43355 7.553227 0.1853302
## sp_sp_tro-gm_sp_trt -10.375568 -32.02048 11.269343 0.4195501
## sp_sp_tro-sp_sp_bck 7.064595 -17.92879 32.057985 0.7260918
#### PV ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_pv_trt"
## [3,] "originSite_finalSite_experiment_pv_pv_tro_vs_gm_pv_trt"
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_tro"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_tro_lfc2_intra,0.05,1)
pv_pv_bck_VS_gm_pv_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_pv_trt"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_gm_pv_trt_lfc2_intra<-trimFunction(pv_pv_bck_VS_gm_pv_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(pv_pv_bck_VS_gm_pv_trt_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.41006
## 2 STRG.38928
## 3 STRG.38500
## 4 STRG.28623 - Sushi
## 5 STRG.19337 - Pkinase
## 6 STRG.12476
## 7 STRG.25239 - EGF
## 8 STRG.21464
## 9 STRG.37592 - Aminotran_3
## 10 STRG.23879
## 11 STRG.32899 - zf-MYND
## 12 STRG.38652 - Sushi
## 13 STRG.31485
## 14 STRG.41361 - I-set
## 15 STRG.1980 - Pkinase
## 16 STRG.39997
## 17 STRG.7292 - fn3
## 18 STRG.41855
## 19 STRG.20559 - UPA
## 20 STRG.27513 - Zona_pellucida
## 21 STRG.2863
## 22 STRG.8151
## 23 STRG.2883
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - PV") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 10187 0.15671 1.0221 0.372
## Residual 11 54814 0.84329
## Total 13 65001 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 pv_pv_bck vs pv_pv_tro 1 4941.366 0.9603022 0.1206364 0.563 1.000
## 2 pv_pv_bck vs gm_pv_trt 1 5368.479 1.1247097 0.1578604 0.228 0.684
## 3 pv_pv_tro vs gm_pv_trt 1 5017.357 1.0041443 0.1003728 0.464 1.000
## sig
## 1
## 2
## 3
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-gm_pv_trt -4.109184 -21.894782 13.67641 0.8102582
## pv_pv_tro-gm_pv_trt 4.560327 -10.186712 19.30737 0.6899343
## pv_pv_tro-pv_pv_bck 8.669511 -8.551321 25.89034 0.3938340
#### GM ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_tro_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_gm_trt_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_sp_gm_trt_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_tro"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_tro_lfc2_intra,0.05,1)
gm_gm_bck_VS_sp_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_gm_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_sp_gm_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_sp_gm_trt_lfc2_intra,0.05,1)
gm_gm_bck_VS_pv_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_gm_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_pv_gm_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_pv_gm_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_tro_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_sp_gm_trt_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_pv_gm_trt_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.3820 - TNF
## 2 STRG.15249 - DNA_pol_B_2
## 3 STRG.19979
## 4 STRG.6534 - DERM
## 5 STRG.33033 - UPAR_LY6
## 6 STRG.34473 - ATP-synt_ab
## 7 STRG.16889 - EGF
## 8 STRG.24509 - ATG27
## 9 STRG.19197 - zf-TRAF
## 10 STRG.40723 - Bax1-I
## 11 STRG.36683 - CAP_GLY
## 12 STRG.27219
## 13 STRG.12145 - F5_F8_type_C
## 14 STRG.31012 - Filamin
## 15 STRG.32554
## 16 STRG.33979 - Cytochrom_B561
## 17 STRG.36363
## 18 STRG.35320 - TNF
## 19 STRG.26700
## 20 STRG.17139 - FAD_binding_1
## 21 STRG.36671
## 22 STRG.5029 - Helicase_C_2
## 23 STRG.21464
## 24 STRG.40720 - COR
## 25 STRG.19337 - Pkinase
## 26 STRG.14152
## 27 STRG.32261 - I-set
## 28 STRG.33364 - SDF
## 29 STRG.39645 - EGF
## 30 STRG.41459
## 31 STRG.28658 - LUC7
## 32 STRG.40373 - cEGF
## 33 STRG.42968 - DOMON
## 34 STRG.24860 - Collagen
## 35 STRG.34745 - fn3
## 36 STRG.3620 - zf-CCHC_6
## 37 STRG.24070
## 38 STRG.30013
## 39 STRG.9452 - CAP_GLY
## 40 STRG.40542 - Phage_integrase
## 41 STRG.39050 - DUF3990
## 42 STRG.39049 - DUF3990
## 43 STRG.12227
## 44 STRG.14413
## 45 STRG.30824 - Amino_oxidase
## 46 STRG.35964 - Beta_helix
## 47 STRG.955
## 48 STRG.35123
## 49 STRG.12039 - Pkinase
## 50 STRG.6873 - Ank
## 51 STRG.34377
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - GM") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 3 13991 0.20697 1.1309 0.153
## Residual 13 53609 0.79303
## Total 16 67600 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_tro 1 3550.331 1.0202444 0.1453289 0.444 1.000
## 2 gm_gm_bck vs sp_gm_trt 1 4915.474 1.2853949 0.2045050 0.218 1.000
## 3 gm_gm_bck vs pv_gm_trt 1 4025.316 0.9543117 0.1372259 0.595 1.000
## 4 gm_gm_tro vs sp_gm_trt 1 5824.720 1.4406988 0.1706848 0.101 0.606
## 5 gm_gm_tro vs pv_gm_trt 1 5112.247 1.1858412 0.1290945 0.168 1.000
## 6 sp_gm_trt vs pv_gm_trt 1 4365.598 0.9336818 0.1176858 0.584 1.000
## sig
## 1
## 2
## 3
## 4
## 5
## 6
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_tro-gm_gm_bck 10.547822 -3.1222831 24.217926 0.1573278
## pv_gm_trt-gm_gm_bck 17.941110 4.2710049 31.611214 0.0094459
## sp_gm_trt-gm_gm_bck 13.399350 -0.8971875 27.695888 0.0692403
## pv_gm_trt-gm_gm_tro 7.393288 -4.4453699 19.231946 0.3024017
## sp_gm_trt-gm_gm_tro 2.851529 -9.7052644 15.408321 0.9077883
## sp_gm_trt-pv_gm_trt -4.541759 -17.0985524 8.015033 0.7176656
#### Sept2018 ####
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_sept2018_inversed.txt',header=T)
samplesPV<-read.table('tximport_design_PV_sept2018_inversed.txt',header=T)
samplesGM<-read.table('tximport_design_GM_sept2018_inversed.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("sept2018_sp_sp|sept2018_gm_sp", colnames(raw_counts))]
raw_counts_pv <- raw_counts[,grep("sept2018_pv_pv|sept2018_gm_pv", colnames(raw_counts))]
raw_counts_gm <- raw_counts[,grep("sept2018_gm_gm|sept2018_sp_gm|sept2018_pv_gm", colnames(raw_counts))]
#samples_order_sp = c(8,9,10,11,12,13,14,15,16,1,2,3,4,5,6,7)
#samples_order_pv = c(7,8,9,10,11,12,13,14,15,1,2,3,4,5,6)
#samples_order_gm = c(1,2,3,4,5,6,7,8,9,10,18,19,20,21,22,23,24,11,12,13,14,15,16,17)
#raw_counts_sp <- raw_counts_sp[,samples_order_sp]
#raw_counts_pv <- raw_counts_pv[,samples_order_pv]
#raw_counts_gm <- raw_counts_gm[,samples_order_gm]
#### SP ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 151 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_gm_sp_gas"
## [3,] "originSite_finalSite_experiment_sp_sp_gas_vs_gm_sp_gas"
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_gas_lfc2_intra,0.05,1)
sp_sp_bck_VS_gm_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","gm_sp_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_gm_sp_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_gm_sp_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(sp_sp_bck_VS_gm_sp_gas_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.9055
## 2 STRG.34841 - SEA
## 3 STRG.34850 - BEN
## 4 STRG.13067 - Peptidase_C14
## 5 STRG.32612
## 6 STRG.34749 - CHAT
## 7 STRG.2331
## 8 STRG.36150
## 9 STRG.25239 - EGF
## 10 STRG.15785 - HCO3_cotransp
## 11 STRG.35964 - Beta_helix
## 12 STRG.40146
## 13 STRG.27187 - A2M
## 14 STRG.32829
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - SP") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 11772 0.1439 1.0926 0.271
## Residual 13 70034 0.8561
## Total 15 81806 1.0000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 sp_sp_bck vs sp_sp_gas 1 4546.240 0.8842399 0.11215284 0.643 1.000
## 2 sp_sp_bck vs gm_sp_gas 1 6251.666 1.1727958 0.12785587 0.249 0.747
## 3 sp_sp_gas vs gm_sp_gas 1 6531.885 1.1695675 0.09610592 0.216 0.648
## sig
## 1
## 2
## 3
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## sp_sp_bck-gm_sp_gas -15.703378 -31.369793 -0.03696242 0.0494400
## sp_sp_gas-gm_sp_gas -1.768251 -14.398919 10.86241680 0.9278572
## sp_sp_gas-sp_sp_bck 13.935127 -2.118172 29.98842524 0.0926770
#### PV ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 142 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_gm_pv_gas"
## [3,] "originSite_finalSite_experiment_pv_pv_gas_vs_gm_pv_gas"
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_gas_lfc2_intra,0.05,1)
pv_pv_bck_VS_gm_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","gm_pv_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_gm_pv_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_gm_pv_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(pv_pv_bck_VS_gm_pv_gas_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.23232 - zf-C3HC4
## 2 STRG.39997
## 3 STRG.22973
## 4 STRG.18537 - BRICHOS
## 5 STRG.39645 - EGF
## 6 STRG.42631 - Bcl-2
## 7 STRG.34768 - TMEM220
## 8 STRG.19185
## 9 STRG.41006
## 10 STRG.5456 - DUF3987
## 11 STRG.6156 - fn3
## 12 STRG.28262 - Laminin_B
## 13 STRG.4831
## 14 STRG.3197
## 15 STRG.37728 - Laminin_G_3
## 16 STRG.679 - HECT
## 17 STRG.42790 - Fringe
## 18 STRG.5258 - Amino_oxidase
## 19 STRG.36828 - PALP
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - PV") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 13018 0.18276 1.4536 0.012 *
## Residual 13 58214 0.81724
## Total 15 71232 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 pv_pv_bck vs pv_pv_gas 1 4059.201 0.906614 0.1146653 0.661 1.000
## 2 pv_pv_bck vs gm_pv_gas 1 7468.260 1.683977 0.1738931 0.031 0.093
## 3 pv_pv_gas vs gm_pv_gas 1 7467.692 1.655904 0.1308404 0.004 0.012 .
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-gm_pv_gas -8.2040678 -22.624231 6.216096 0.3218336
## pv_pv_gas-gm_pv_gas -0.2839956 -11.909903 11.341912 0.9977094
## pv_pv_gas-pv_pv_bck 7.9200722 -6.856198 22.696343 0.3620039
#### GM ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 138 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_gas_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_pv_gm_gas_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_sp_gm_gas_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_gas_lfc2_intra,0.05,1)
gm_gm_bck_VS_sp_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","sp_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_sp_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_sp_gm_gas_lfc2_intra,0.05,1)
gm_gm_bck_VS_pv_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","pv_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_pv_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_pv_gm_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_gas_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_sp_gm_gas_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_pv_gm_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
rownames_annot_all_intra
## genes
## 1 STRG.16155
## 2 STRG.31389 - HSP70
## 3 STRG.27900 - HSP70
## 4 STRG.41130
## 5 STRG.2331
## 6 STRG.30187 - EF-hand_1
## 7 STRG.14942
## 8 STRG.1286 - Chromo
## 9 STRG.3820 - TNF
## 10 STRG.7740 - fn3
## 11 STRG.42069 - SOUL
## 12 STRG.42790 - Fringe
## 13 STRG.24860 - Collagen
## 14 STRG.22973
## 15 STRG.20559 - UPA
## 16 STRG.15024 - DAZAP2
## 17 STRG.1287
## 18 STRG.14152
## 19 STRG.29420 - Ribosomal_L18A
## 20 STRG.41408 - Pkinase
## 21 STRG.105 - Ribosomal_L6e_N
## 22 STRG.17312 - RT_RNaseH
## 23 STRG.24939 - 2OG-FeII_Oxy
## 24 STRG.22251 - S-methyl_trans
## 25 STRG.24042 - AAA_11
## 26 STRG.1170 - CUB
## 27 STRG.13105 - Cu2_monooxygen
## 28 STRG.24510 - 7tm_3
## 29 STRG.708 - GDA1_CD39
## 30 STRG.30962 - Mei4
## 31 STRG.15023 - MoaC
## 32 STRG.22537 - HLH
## 33 STRG.17988
## 34 STRG.2883
## 35 STRG.6534 - DERM
## 36 STRG.261
## 37 STRG.32553 - UPAR_LY6
## 38 STRG.2113
## 39 STRG.34588 - Collagen
## 40 STRG.41850
## 41 STRG.23759 - MIEAP
## 42 STRG.40373 - cEGF
## 43 STRG.8640 - EGF
## 44 STRG.42227 - RVT_1
## 45 STRG.12613 - Fibrinogen_C
## 46 STRG.27038 - Forkhead
## 47 STRG.11433
## 48 STRG.29417 - NACHT
## 49 STRG.37064 - HSP70
## 50 STRG.18099
## 51 STRG.36750 - VWA
## 52 STRG.119 - MIEAP
## 53 STRG.24933
## 54 STRG.12709 - CBM_14
## 55 STRG.16167 - An_peroxidase
## 56 STRG.29418 - Acyl-CoA_dh_1
## 57 STRG.42225 - MIEAP
## 58 STRG.16136 - RVT_1
## 59 STRG.30013
## 60 STRG.3398 - cEGF
## 61 STRG.37065 - MIEAP
## 62 STRG.40148
## 63 STRG.12662 - EGF_3
## 64 STRG.40142 - HSP70
## 65 STRG.42230 - HSP70
## 66 STRG.40093 - Cu2_monooxygen
## 67 STRG.40094 - Cu2_monoox_C
## 68 STRG.1714 - Leg1
## 69 STRG.18100 - KDZ
## 70 STRG.40147 - MIEAP
## 71 STRG.40647 - Alk_phosphatase
## 72 STRG.13719
## 73 STRG.5475 - I-set
## 74 STRG.41593 - MAPEG
## 75 STRG.35964 - Beta_helix
## 76 STRG.4914 - Astacin
## 77 STRG.42229 - MIEAP
## 78 STRG.27801 - Astacin
## 79 STRG.22986 - EGF
## 80 STRG.28262 - Laminin_B
## 81 STRG.27870 - Phospholip_A2_3
## 82 STRG.18096
## 83 STRG.23259 - SRCR
## 84 STRG.4770 - Anoctamin
## 85 STRG.17793
## 86 STRG.20517 - TSP_1
## 87 STRG.2780 - Amino_oxidase
## 88 STRG.2781 - Amino_oxidase
## 89 STRG.26149
## 90 STRG.14937 - CxC3
## 91 STRG.17200 - F5_F8_type_C
## 92 STRG.32403 - Filamin
## 93 STRG.37066 - MIEAP
## 94 STRG.15029 - DUF1647
## 95 STRG.3466 - DEAD
## 96 STRG.2402
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - GM") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 3 43790 0.23448 1.94 0.001 ***
## Residual 19 142960 0.76552
## Total 22 186750 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm_gm_bck vs gm_gm_gas 1 10658.42 1.330685 0.1426139 0.067 0.402
## 2 gm_gm_bck vs sp_gm_gas 1 19840.62 2.521545 0.2396554 0.008 0.048 .
## 3 gm_gm_bck vs pv_gm_gas 1 14656.40 1.691785 0.1946419 0.054 0.324
## 4 gm_gm_gas vs sp_gm_gas 1 17254.23 2.515277 0.1732848 0.001 0.006 *
## 5 gm_gm_gas vs pv_gm_gas 1 13144.63 1.807099 0.1411014 0.002 0.012 .
## 6 sp_gm_gas vs pv_gm_gas 1 12141.07 1.693049 0.1333839 0.001 0.006 *
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_gas-gm_gm_bck -7.442902 -31.42738 16.54157 0.8188379
## pv_gm_gas-gm_gm_bck -5.248574 -29.82535 19.32820 0.9306434
## sp_gm_gas-gm_gm_bck -8.477402 -32.46187 15.50707 0.7546381
## pv_gm_gas-gm_gm_gas 2.194328 -17.14257 21.53123 0.9883982
## sp_gm_gas-gm_gm_gas -1.034500 -19.61279 17.54379 0.9985829
## sp_gm_gas-pv_gm_gas -3.228828 -22.56573 16.10807 0.9648532